* Create a flag for being near a plant or control site;


* [] brackets indicate redacted internal census variable name or directory that can't be disclosed. ;

* define libraries;
libname icf '[directory location for geocoded worker residence files]';
libname hannah  '/projects/users/########';
libname ben '/projects/users/########';
libname hantmp  '/projects/users/########/tempfiles';


* GET COUNTY BOUNDARY SHAPEFILES;
data countyboundaries2;
  set mapsgfk.us_counties 
  (where=(state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56)));
  x = long;
  y = lat;
  ctyfips = substr(ID,4,5);
run;
data countyboundaries2;
  set countyboundaries2 (keep = ID ctyfips SEGMENT X Y);
run;
%CENTROID(countyboundaries2,hantmp.countycenters2,ctyfips);

proc import out = hantmp.cnty_level_ivs22
  datafile = "/projects/users/########/cnty_level_ivs2.dta"
  dbms = dta
  replace;
run;

data hantmp.cnty_level_ivs22;
  set hantmp.cnty_level_ivs22 
  (keep = ctyfips year zhat_cnty cond_exp_ycap cond_exp_ytnum meanspd);
run;

proc sql;
  create table hantmp.cnty_level_ivs22 
  as select
  a.*, b.*
  from hantmp.cnty_level_ivs22 as a
    left join hantmp.countycenters2 as b on a.ctyfips = b.ctyfips
  order by a.ctyfips, a.year;
quit;

* define macro "statecounty_wrkr_IVs" which does 100-mile buffer for certain years, states by county;
* id100, id80, etc. are made-up indicators for whether or not a unit is within an 80-mile or 100 mile distance band;
%macro statecounty_wrkr_IVs;
  %let stfip = 01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56;
  %local I next_fip;
  %let I = 1;
  %do %while (%scan(&stfip, &I) ne );
    %let next_fip = %scan(&stfip, &I);
      %do yr = 2000 %to 2015;
	*GET WORKER PIKS FOR A STATE/YEAR;
	*Create temp file of all piks residences in current state and year;
	DATA hantmp.icf_&next_fip._&yr;
	  SET icf.icf_us_addresses (KEEP = [county id] pik [year] [latitude] [longitude]);
	  IF SUBSTR([county id],1,2) = "&next_fip" AND [year] = &yr;
	  y = [latitude]/1000000;
	  x = [longitude]/1000000;
	  DROP [latitude] [longitude];
	RUN;

	*Flag observations inside the buffer;
	PROC GINSIDE 
	  DATA	= hantmp.icf_&next_fip._&yr
	  MAP 	= hannah.USmile100p
	  OUT	= hantmp.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ pik [year] [county id]);
	rename _ID_ = id100;
	run;
	PROC GINSIDE 
	  DATA	= hantmp.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile80p
	  OUT	= hantmp.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ id100 pik [year] [county id]);
	rename _ID_ = id80;
	run;
	PROC GINSIDE 
	  DATA	= hantmp.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile60p
	  OUT	= hantmp.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ id80 id100 pik [year] [county id]);
	rename _ID_ = id60;
	run;
	PROC GINSIDE 
	  DATA	= hantmp.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile40p
	  OUT	= hantmp.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ id60 id80 id100 pik [year] [county id]);
	rename _ID_ = id40;
	run;
	PROC GINSIDE 
	  DATA	= hantmp.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile20p
	  OUT	= hantmp.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hantmp.in_100m_&next_fip._&yr;
	set hantmp.in_100m_&next_fip._&yr (keep = x y _ID_ id40 id60 id80 id100 pik [year] [county id]);
	rename _ID_ = id20;
	run;

	* Code dummy for inside/outside the buffer, rename;
	DATA hantmp.in_100m_&next_fip._&yr;
	  SET hantmp.in_100m_&next_fip._&yr (keep = pik [year] [county id] x y id100);
	  IF CMISS(id100) THEN DELETE;
	  statef = &next_fip;
	RUN;

	* Now bring in predicted capacity data, make separate dataset just within the buffer;
	* Subset predicted capacity data to year and state (&yr, &next_fip);
	data hantmp.cnties_&yr;
	  set hantmp.cnty_level_ivs22;
	  IF year = &yr;
	run;

	* Now loop through counties in state &next_fip;
	%let sqlobs=0;
	%let cnties=;
	proc sql;
	  select distinct [county id] 
	    into :cnties 
	    separated by ' ' 
	    from hantmp.in_100m_&next_fip._&yr where not missing([county id]);
	quit;
	%let my_count = &sqlobs;
	%if &my_count gt 0 %then %do;
	%do ct = 1 %to %sysfunc(countw(&cnties));
	  %let next_ct = %scan(&cnties,&ct,%str( ));
	    data hantmp.in_100m_&next_fip._&yr._&next_ct;
	      set hantmp.in_100m_&next_fip._&yr;
	      if [county id] = &next_ct;
	    run;
	    proc sql; 
	      CREATE TABLE hantmp.hh_windcapsum_&next_fip._&yr._&next_ct AS
	      SELECT 
		A.*
		,E.*
		,GEODIST(A.y, A.x, E.y, E.x, 'M' ) AS dist_to_turb
	      FROM hantmp.in_100m_&next_fip._&yr._&next_ct AS a
		LEFT JOIN hantmp.cnties_&yr AS e ON statef
	      WHERE CALCULATED dist_to_turb <= 100
		AND CALCULATED dist_to_turb ^=.
	      ORDER BY A.pik, A.[year]; 
	    quit;
	    proc sql;
	      CREATE TABLE hantmp.hh_wcapsum_&next_fip._&yr._&next_ct AS
	      SELECT pik, [year], 
	      SUM(cond_exp_ycap) as caphat100,
	      SUM(cond_exp_ytnum) as turbhat100,
	      SUM(zhat_cnty) as zhatcnty100,
	      MEAN(meanspd) as meanspd100,
	      SUM(case when dist_to_turb<=80 then cond_exp_ycap else 0 end) as caphat80,
	      SUM(case when dist_to_turb<=80 then cond_exp_ytnum else 0 end) as turbhat80,
	      SUM(case when dist_to_turb<=80 then zhat_cnty else 0 end) as zhatcnty80,
	      MEAN(case when dist_to_turb<=80 then meanspd else . end) as meanspd80,
	      SUM(case when dist_to_turb<=60 then cond_exp_ycap else 0 end) as caphat60,
	      SUM(case when dist_to_turb<=60 then cond_exp_ytnum else 0 end) as turbhat60,
	      SUM(case when dist_to_turb<=60 then zhat_cnty else 0 end) as zhatcnty60,
	      MEAN(case when dist_to_turb<=60 then meanspd else . end) as meanspd60,
	      SUM(case when dist_to_turb<=40 then cond_exp_ycap else 0 end) as caphat40,
	      SUM(case when dist_to_turb<=40 then cond_exp_ytnum else 0 end) as turbhat40,
	      SUM(case when dist_to_turb<=40 then zhat_cnty else 0 end) as zhatcnty40,
	      MEAN(case when dist_to_turb<=40 then meanspd else . end) as meanspd40,
	      SUM(case when dist_to_turb<=20 then cond_exp_ycap else 0 end) as caphat20,
	      SUM(case when dist_to_turb<=20 then cond_exp_ytnum else 0 end) as turbhat20,
	      SUM(case when dist_to_turb<=20 then zhat_cnty else 0 end) as zhatcnty20,
	      MEAN(case when dist_to_turb<=20 then meanspd else . end) as meanspd20,
	      MAX([county id]) as [county id],
	      MAX(statef) as statef,
	      MAX(y) as y,
	      MAX(x) as x
	      FROM hantmp.hh_windcapsum_&next_fip._&yr._&next_ct
	      GROUP BY pik, [year]
	      ORDER BY pik, [year];
	    quit;
	    PROC APPEND base = hannah.all_piks_ivs_100p 
	      data = hantmp.hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN;
	    PROC DATASETS library = hantmp nolist;
	      delete in_100m_&next_fip._&yr._&next_ct hh_windcapsum_&next_fip._&yr._&next_ct hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN; QUIT;
	  %end;
	 
	PROC DATASETS library = hantmp nolist;
	  delete icf_&next_fip._&yr cnties_&yr ;
	RUN; QUIT;
	PROC DATASETS library = hantmp nolist;
	  delete in_100m_&next_fip._&yr;
	RUN; QUIT;
      %end;
    %end;
    %let I = %eval(&I + 1);
  %end;
%mend;


* remove any old copies of appended "base" dataset before running macro;
/*
PROC DATASETS library = hannah nolist;
  delete all_piks_ivs_100p;
RUN; QUIT;
*/

%statecounty_wrkr_IVs;



